{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1470c0cd",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install pandarallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51612777",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!npm i -g mapshaper@0.5.67"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59569965",
   "metadata": {},
   "outputs": [],
   "source": [
    "import geopandas\n",
    "import pandas\n",
    "import os\n",
    "import json\n",
    "from pandarallel import pandarallel\n",
    "import topojson as tp\n",
    "pandarallel.initialize(progress_bar=True, use_memory_fs=False, nb_workers=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3e4b434",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_dir = './../../datasets/seed/wdpa/wdpa/simp'\n",
    "f = []\n",
    "for (dirpath, dirnames, filenames) in os.walk(base_dir):\n",
    "    for filename in filenames:\n",
    "        f.append(f'{dirpath}/{filename}')\n",
    "\n",
    "gdf = pandas.concat([\n",
    "    geopandas.read_file(file)\n",
    "    for file in f\n",
    "]).pipe(geopandas.GeoDataFrame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8fe245e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "gdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4b7dedf",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_dir = '.'\n",
    "f = []\n",
    "for (dirpath, dirnames, filenames) in os.walk(base_dir):\n",
    "    for filename in filenames:\n",
    "        if '.json' in filename and 'r' in filename:\n",
    "            f.append(f'{dirpath}/{filename}')\n",
    "gdf = pandas.concat([\n",
    "    geopandas.read_file(file)\n",
    "    for file in f\n",
    "]).pipe(geopandas.GeoDataFrame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e50f818",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76745869",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_dir_in = './../../datasets/seed/wdpa/wdpa/geojson'\n",
    "f_in = []\n",
    "for (dirpath, dirnames, filenames) in os.walk(base_dir_in):\n",
    "    for filename in filenames:\n",
    "        f_in.append(f'{dirpath}/{filename}')\n",
    "\n",
    "gdf_in = pandas.concat([\n",
    "    geopandas.read_file(file)\n",
    "    for file in f_in\n",
    "]).pipe(geopandas.GeoDataFrame)\n",
    "\n",
    "gdf_in"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05d1517c",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_in.groupby('IUCN_CAT').agg({'WDPAID': ['count']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65aee653",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f'{100 - round((196554/200076)*100)}%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a3674e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f'{100 - round((200076/254424)*100)}%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "341aab0d",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "base_dir_orig = './../../datasets/seed/wdpa/wdpa/data'\n",
    "f_orig = []\n",
    "for (dirpath, dirnames, filenames) in os.walk(base_dir_orig):\n",
    "    for filename in filenames:\n",
    "        if '.shp' in filename:\n",
    "            f_orig.append(f'{dirpath}/{filename}')\n",
    "\n",
    "gdf_orig = pandas.concat([\n",
    "    geopandas.read_file(file)\n",
    "    for file in f_orig\n",
    "]).pipe(geopandas.GeoDataFrame)\n",
    "\n",
    "gdf_orig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3907684c",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_dir_orig = './wdpa_0.shp'\n",
    "gdf_orig = geopandas.read_file(base_dir_orig)\n",
    "gdf_orig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "befb37a7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "gdf_orig.geometry.sindex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "62fec47a",
   "metadata": {},
   "outputs": [],
   "source": [
    "tp.Topology(data = gdf_orig, prequantize=False).to_svg()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a127cc16",
   "metadata": {},
   "outputs": [],
   "source": [
    "t = topo.toposimplify(5).to_gdf()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d392a020",
   "metadata": {},
   "outputs": [],
   "source": [
    "mapshaper-xl 16gb -i $$f name=\"$$filename\" -split ISO3 -o $@ format=geojson precision=0.0000001;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3db2de28",
   "metadata": {},
   "outputs": [],
   "source": [
    "def is_valid(geom):\n",
    "    return geom.is_valid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d0142d5",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "gdf_orig_2 = gdf_orig.geometry.parallel_apply(is_valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4c14ec5",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_orig.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3cc294a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_orig.groupby('geom_is_valid').agg({'WDPAID': ['count']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d666b8b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_orig['geom_is_valid']=gdf_orig['geometry'].is_valid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93a4ae71",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_orig.groupby('IUCN_CAT').agg({'WDPAID': ['count']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e986ee2",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_split = [v for k, v in gdf_orig.groupby('IUCN_CAT')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a7729b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "for index, dataset in enumerate(test_split):\n",
    "    dataset.to_file(f\"wdpa_{index}.shp\", crs_wkt = {'init': 'epsg:4326', 'no_defs': True})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "772e21b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_orig.groupby('ISO3').agg({'WDPAID': ['count']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdb76e82",
   "metadata": {},
   "outputs": [],
   "source": [
    "from shapely.geometry import box"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "52ba9f9e",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Allocating 16 GB of heap memory\n",
      "[o] Wrote data/1.json\n",
      "[o] Wrote data/2.json\n",
      "[o] Wrote data/3.json\n",
      "[o] Wrote data/4.json\n",
      "[o] Wrote data/5.json\n",
      "[o] Wrote data/6.json\n",
      "[o] Wrote data/7.json\n",
      "[o] Wrote data/8.json\n",
      "[o] Wrote data/9.json\n",
      "[o] Wrote data/10.json\n",
      "[o] Wrote data/11.json\n"
     ]
    }
   ],
   "source": [
    "!mapshaper-xl 16gb -i ./../../datasets/seed/wdpa/wdpa/data/*.shp no-topology combine-files \\\n",
    "    -merge-layers \\\n",
    "    -each \"group = Math.trunc(this.id/20000)+1\" \\\n",
    "    -split group \\\n",
    "    -o data/ format=geojson precision=0.0001 force ndjson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40408027",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "!mapshaper-xl 16gb -i ./data/r*.json snap combine-files -merge-layers -info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c296c5a8",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Allocating 16 GB of heap memory\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 3,403 intersections; 11,536 intersections could not be repaired\n",
      "[filter-islands] Removed 46,377 islands\n",
      "[filter-slivers] Removed 22,243 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 4,676 / 9,158 slivers using 0.039+ sqkm variable threshold\n",
      "[clean] Retained 19,298 of 22,066 features\n",
      "[o] Wrote 10.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 1,372 intersections; 3,621 intersections could not be repaired\n",
      "[filter-islands] Removed 1,719 islands\n",
      "[filter-slivers] Removed 1,086 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 2,011 / 2,860 slivers using 0.062+ sqkm variable threshold\n",
      "[clean] Retained 4,086 of 4,219 features\n",
      "[o] Wrote 11.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 2,148 intersections; 77,659 intersections could not be repaired\n",
      "[filter-islands] Removed 50,711 islands\n",
      "[filter-slivers] Removed 31,763 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 17,423 / 26,375 slivers using 0.043+ sqkm variable threshold\n",
      "[clean] Retained 21,841 of 24,461 features\n",
      "[o] Wrote 1.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 951 intersections; 7,499 intersections could not be repaired\n",
      "[filter-islands] Removed 15,365 islands\n",
      "[filter-slivers] Removed 16,229 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 3,261 / 5,251 slivers using 8700+ sqm variable threshold\n",
      "[clean] Retained 20,388 of 23,238 features\n",
      "[o] Wrote 2.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 1,003 intersections; 5,022 intersections could not be repaired\n",
      "[filter-islands] Removed 12,007 islands\n",
      "[filter-slivers] Removed 10,472 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 3,493 / 8,129 slivers using 0.018+ sqkm variable threshold\n",
      "[clean] Retained 18,969 of 22,326 features\n",
      "[o] Wrote 3.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 1,863 intersections; 10,434 intersections could not be repaired\n",
      "[filter-islands] Removed 31,749 islands\n",
      "[filter-slivers] Removed 16,898 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 5,484 / 9,328 slivers using 0.024+ sqkm variable threshold\n",
      "[clean] Retained 18,911 of 22,512 features\n",
      "[o] Wrote 4.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 2,373 intersections; 22,069 intersections could not be repaired\n",
      "[filter-islands] Removed 35,165 islands\n",
      "[filter-slivers] Removed 19,654 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 8,071 / 18,018 slivers using 0.017+ sqkm variable threshold\n",
      "[clean] Retained 19,918 of 22,973 features\n",
      "[o] Wrote 5.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 17,091 intersections; 85,826 intersections could not be repaired\n",
      "[filter-islands] Removed 34,915 islands\n",
      "[filter-slivers] Removed 33,902 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 17,451 / 30,380 slivers using 0.016+ sqkm variable threshold\n",
      "[clean] Retained 21,714 of 24,326 features\n",
      "[o] Wrote 6.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 0 intersections; 38,839 intersections could not be repaired\n",
      "[filter-islands] Removed 21,977 islands\n",
      "[filter-slivers] Removed 19,268 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 9,932 / 16,234 slivers using 0.018+ sqkm variable threshold\n",
      "[clean] Retained 21,759 of 24,108 features\n",
      "[o] Wrote 7.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 4,423 intersections; 58,992 intersections could not be repaired\n",
      "[filter-islands] Removed 85,327 islands\n",
      "[filter-slivers] Removed 59,178 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 12,936 / 19,879 slivers using 0.016+ sqkm variable threshold\n",
      "[clean] Retained 20,540 of 23,257 features\n",
      "[o] Wrote 8.json\n",
      "[i] Snapped 0 points\n",
      "[simplify] Repaired 1,264 intersections; 38,009 intersections could not be repaired\n",
      "[filter-islands] Removed 181,090 islands\n",
      "[filter-slivers] Removed 101,585 slivers using 0.01+ sqkm variable threshold\n",
      "[clean] Removed 15,035 / 25,900 slivers using 6200+ sqm variable threshold\n",
      "[clean] Retained 18,010 of 22,360 features\n",
      "[o] Wrote 9.json\n"
     ]
    }
   ],
   "source": [
    "!mapshaper-xl 16gb -i ./data/*.json snap -simplify 20% planar keep-shapes -filter-islands min-vertices=3 min-area=10000m2 remove-empty -filter-slivers min-area=10000m2 remove-empty -clean rewind -o format=geojson force ndjson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "865b1dcf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Allocating 16 GB of heap memory\n",
      "[info] \n",
      "==============================================\n",
      "Layer:    [unnamed layer]\n",
      "----------------------------------------------\n",
      "Type:     polygon\n",
      "Records:  205,434\n",
      "Bounds:   -180,-85.4119,180,86.4537\n",
      "CRS:      +proj=longlat +datum=WGS84\n",
      "Source:   ./10.json\n",
      "\n",
      "Attribute data\n",
      "------------+---------------------------------\n",
      " Field      | First value\n",
      "------------+---------------------------------\n",
      " CONS_OBJ   | 'Not Applicable'\n",
      " DESIG      | 'Private Conservation'\n",
      " DESIG_ENG  | 'Private Conservation'\n",
      " DESIG_TYPE | 'National'\n",
      " GIS_AREA   |         0.184653691756769\n",
      " GIS_M_AREA |         0\n",
      " GOV_TYPE   | 'Non-profit organisations'\n",
      " group      |        10\n",
      " INT_CRIT   | 'Not Applicable'\n",
      " ISO3       | 'USA'\n",
      " IUCN_CAT   | 'V'\n",
      " MANG_AUTH  | 'Non-Governmental Organization'\n",
      " MANG_PLAN  | 'Not Reported'\n",
      " MARINE     | '0'\n",
      " METADATAID |      1848\n",
      " NAME       | 'Winslow Woods'\n",
      " NO_TAKE    | 'Not Applicable'\n",
      " NO_TK_AREA |         0\n",
      " ORIG_NAME  | 'Winslow Woods'\n",
      " OWN_TYPE   | 'Non-profit organisations'\n",
      " PA_DEF     | '1'\n",
      " PARENT_ISO | 'USA'\n",
      " REP_AREA   |         0.184653691757284\n",
      " REP_M_AREA |         0\n",
      " STATUS     | 'Designated'\n",
      " STATUS_YR  |      2002\n",
      " SUB_LOC    | 'US-ME'\n",
      " SUPP_INFO  | 'Not Applicable'\n",
      " VERIF      | 'State Verified'\n",
      " WDPA_PID   | '555661032'\n",
      " WDPAID     | 555661032\n",
      "------------+---------------------------------\n",
      "\n",
      "[o] Wrote symp-wdpa.shp\n",
      "[o] Wrote symp-wdpa.shx\n",
      "[o] Wrote symp-wdpa.dbf\n",
      "[o] Wrote symp-wdpa.prj\n"
     ]
    }
   ],
   "source": [
    "!mapshaper-xl 16gb -i ./*.json combine-files -merge-layers -info -o symp-wdpa.shp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bb9a8ebf",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
